***neighborhood-level social capital and depression after the outbreak of the COVID-19 pandemic: the mediating role of confidence in future***

***Part 1: depression score**
clear 
set more off
use "D:\Data\cfps_20+2018_depression"
cd "D:\Output"

**preparing dataset**
tab year
drop if year==.
gen time=.
replace time=1 if year==2018
replace time=2 if year==2020
ta time year

xtset pid time
foreach var of varlist qn406 qn407 qn411 qn412 qn414 qn416 qn418 qn420 qn12016 {
	gen lag_`var'=l.`var'
}
tab1 lag_qn406-lag_qn420 qn12016
tab lag_qn406 time

drop time
drop if year==2018

*data analysis*
*demographic*
/*gender*/
tab gender
tab gender,nol

/*age*/
tab age
codebook age
recode age (-8/15=.) (16/29=1 "16-29") (30/39=2 "30-39") (40/49=3 "40-49") ///
       (50/59=4 "50-59") (60/69=5 "60-69") (70/104=6 "70 or above"), gen(age_group)
tab age_group /*26,386 persons >=16 years*/	

drop if age<16 /***只保留成人***/
ta gender 

/*ethnicity*/
tab ethnicity
ta ethnicity, nol
recode ethnicity (1=1 "Han") (2/79=0 "non-Han") (-8=.), gen(eth_new)
ta eth_new 

/*marriage*/
ta marriage
codebook marriage
recode marriage (-8=.) (1=0 "未婚/离婚/丧偶") (4/5=0) (2/3=1 "已婚"), gen(mrrge_new)
tab mrrge_new

*socioeconomic*
/*education*/
tab edu /***来自cfps跨年数据库20版***/
tab edu,nol
recode edu (-9/-8=.) (1=1 "no formal education (Illiterate /semi-literate") ///
   (2=2 "primary school") (3=3 "junior high school") (4=4 "senior high school") ///
   (5/8=5 "college or above"), gen(edu_new)
tab edu_new

///****emoployment： 来自跨年核心变量数据库****///
tab employ
codebook employ

gen new_employ=.
replace new_employ=employ if employ>=0 & employ!=.

ta qc4
codebook qc4
replace new_employ=4 if qc4==1 & (employ<0 | employ==.)  /**全日制学生为学生**/
replace new_employ=1 if qc4==5 & (employ==. | employ<0)  /***在职学生为在业***/

recode new_employ (0=0 "unemployed/students/not in labor market") (3/4=0) (1=1 "employed"), gen(new_employ_20)
ta new_employ_20

/*family_size*/
sum familysize

/*household income*/
sum fincome1 
codebook fincome1

/**equivalized income of follow-ups by year in resampled sample**/
gen eq_hhd_inc = fincome1/sqrt(familysize)
sum eq_hhd_inc
hist eq_hhd_inc
gen ln_eq_hhd_inc = ln(eq_hhd_inc+1)
sum ln_eq_hhd_inc
hist ln_eq_hhd_inc

*lifestyle*
/*smoking last month*/
ta qq201
ta qq201, nol
ta qq201 year
recode qq201 (-9/-1=.) (0=0 "No") (1=1 "Yes"), gen(smoking)
ta smoking 

/*drinking more than 3 times per week last month*/
ta qq301
ta qq301,nol
ta qq301 year,nol
recode qq301 (-9/-1=.) (0=0 "No") (1=1 "Yes"), gen(drinking)
ta drinking, missing

/*exercise 2020*/
ta qp701n
ta qp701n, nol
recode qp701n (-9/-1=.) (8=1 "never") (1/2=2 "less than once per week") (3=3 "once or twice per week") (4=4 "3-4 times per week") (5/7=5 "5 times or more per week"), gen (exercise)
ta exercise
*rural-urban*
tab urban
tab urban, nol
recode urban (-9/-8=.)
tab urban 

*trust in neighbors*
ta qn10022
ta qn10022, nol
recode qn10022 (-8/-1=.), pre(new)
ta newqn10022

*vote 
ta qn7
ta qn7, nol
recode qn7 (-8/-1=.) (1=1 "yes") (5=0 "no"), gen(vote)
ta vote

*cesd-8*
tab1 qn406 qn407 qn411 qn412 qn414 qn416 qn418 qn420 lag_qn406-lag_qn420
recode qn406 qn407 qn411 qn412 qn414 qn416 qn418 qn420 lag_qn406-lag_qn420 (-9/-1=.) (1=0) (2=1) (3=2) (4=3),pre(new)
/***反向计分***/
recode newqn412 (0=3) (1=2) (2=1) (3=0)
recode newqn416 (0=3) (1=2) (2=1) (3=0)
recode newlag_qn412 (0=3) (1=2) (2=1) (3=0)
recode newlag_qn416 (0=3) (1=2) (2=1) (3=0)

gen cesd8_new=newqn406+newqn407+newqn411+newqn412+newqn414+newqn416+newqn418+newqn420
sum cesd8_new

gen lag_cesd8_new=newlag_qn406+newlag_qn407+newlag_qn411+newlag_qn412+newlag_qn414+newlag_qn416+newlag_qn418+newlag_qn420
sum lag_cesd8_new

/*dichotomized; 9 as the cutoff*/
/*https://doi.org/10.1007/s41999-017-0016-0*/
recode cesd8_new (9/24=1 "depressed") (0/8=0 "non-depressed"), gen(cesd8_dummy)
ta cesd8_dummy 

recode lag_cesd8_new (9/24=1 "depressed") (0/8=0 "non-depressed"), gen(lag_cesd8_dummy)
ta lag_cesd8_dummy

*physical discomfort during the past two weeks
ta qp301
recode qp301 (-8/-1=.) (0=0 "No") (1=1 "Yes"), gen(phy_hlth)
ta phy_hlth

*confidence about future
tab1 qn12016 lag_qn12016
tab1 qn12016 lag_qn12016, nol
recode qn12016 (-8/-1=.), gen(cfdc)
recode lag_qn12016 (-8/-1=.), gen(lag_cfdc)
tab1 cfdc lag_cfdc

*cid*
ta cid20 /*22,472*/
gen n=1
bys cid20: egen person_cid=total(n)
ta person_cid

gen cid_sample=0
replace cid_sample=1 if person_cid>24 & person_cid<3914
ta cid_sample
label values cid_sample cid_sample
label define cid_sample 1 ">=25 persons in cid" 0 "<25 persons in cid or cid missing"
ta cid_sample /*14,201 persons living in neighborhoods >=25 persons*/

ta subsample
ta cid_sample if subsample==1
logout, save(missing_data_imputated) word replace: ///
mdesc gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise ln_eq_hhd_inc newqn10022 region urban cesd8_new lag_cesd8_new vote cid20 cfdc lag_cfdc phy_hlth if cid_sample==1

keep if cid_sample==1
codebook cid20

logout, save(categorical_imputated) word replace: ///
  tab1 cesd8_dummy gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise phy_hlth lag_cesd8_dummy vote cmonth urban, missing

logout, save(categorical_imputated_non_missing) word replace: ///
  tab1 cesd8_dummy gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise phy_hlth lag_cesd8_dummy vote cmonth urban
  
logout, save(categorical_imputated_non_missing) excel replace: ///
  tab1 cesd8_dummy gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise phy_hlth lag_cesd8_dummy vote cmonth urban
  
logout, save(continuous_imputated) word replace: ///
  sum cesd8_new lag_cesd8_new ln_eq_hhd_inc eq_hhd_inc cfdc lag_cfdc newqn10022

alpha newqn406 newqn407 newqn411 newqn412 newqn414 newqn416 newqn418 newqn420 /*0.7686*/

alpha newlag_qn406 newlag_qn407 newlag_qn411 newlag_qn412 newlag_qn414 newlag_qn416 newlag_qn418 newlag_qn420 /*0.7621*/

global ctrl "gender i.age_group eth_new mrrge_new i.edu_new new_employ_20 smoking drinking  i.exercise phy_hlth lag_cesd8_new"

xtset, clear
mi set mlong
mi register imputed gender age_group eth_new mrrge_new edu_new new_employ_20 smoking drinking exercise ln_eq_hhd_inc vote newqn10022 region urban cesd8_new lag_cesd8_new cid20 cfdc lag_cfdc phy_hlth cesd8_dummy lag_cesd8_dummy cmonth
mi impute chained (logit) gender (ologit) age_group (logit) eth_new (logit) mrrge_new (ologit) edu_new (logit) new_employ_20 (logit) smoking (logit) drinking (ologit) exercise (regress)ln_eq_hhd_inc (logit) vote (regress) newqn10022 (logit) urban (regress) cesd8_new (regress) lag_cesd8_new (regress) cfdc (regress) lag_cfdc (logit) phy_hlth, ///
 add(10) rseed (53421) dots savetrace(impu_trace,replace) 

****neighborhood-level social captial
mi xeq: bys cid20: egen vote_cid_persons=total(vote)
ta vote_cid_persons
mi xeq: bys cid20: gen vote_cid_proportion=(vote_cid_persons/person_cid)*100
ta vote_cid_proportion

mi xeq: bys cid20: egen trust_nghb_cid=mean(newqn10022)
ta trust_nghb_cid   

**smaple characteristics with no imputation
mi xeq: bys cid20:gen m=_n
log using "neighborhood-level sc", replace
/**se=sd/√n; hence; sd=se*√n; √n means square root of n**/
mi estimate, post: mean vote_cid_proportion trust_nghb_cid if m==1
log close 
  
**two-level mediation
mi estimate, post: mixed cesd8_new vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban || cid20:  
est sto a  
mi estimate, post: mixed cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban || cid20: 
est sto b   
mi estimate, post: mixed cesd8_new cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban || cid20: 
est sto c
esttab a b c ///
      using cesd_score_imputed.rtf, ///
      ci star( + 0.1 * 0.05 ** 0.01 *** 0.001) r2 b(%4.2f) pr2 aic bic onecell replace
	  
log using "mediation_cesd_score_imputation", replace   
mi estimate (ind_eff: _b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc]) ///
   (total_eff: _b[cfdc:trust_nghb_cid]*_b[cesd8_new:cfdc]+_b[cesd8_new:trust_nghb_cid]), cmdok: gsem (cfdc <- vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban  M1[cid20]) ///
    (cesd8_new <- cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl lag_cfdc i.cmonth urban M2[cid20]), cov(M1[cid20]*M2[cid20]@0)
log close    

***Part 2: depression dummy**
mi impute chained (logit) gender (ologit) age_group (logit) eth_new (logit) mrrge_new (ologit) edu_new (logit) new_employ_20 (logit) smoking (logit) drinking (ologit) exercise (regress)ln_eq_hhd_inc (logit) vote (regress) newqn10022 (logit) urban (logit) cesd8_dummy (logit)lag_cesd8_dummy (regress) cfdc (regress) lag_cfdc (logit) phy_hlth, ///
 replace rseed (53421) dots savetrace(impu_trace,replace) 
/*replace imputed data using cesd8_dummy*/
/*cesd8_dummy and cesd8 cannot be put into the same equation becasue of collinearity*/

global ctrl2 "gender i.age_group eth_new mrrge_new i.edu_new new_employ_20 smoking drinking i.exercise phy_hlth lag_cesd8_dummy"
  
mi estimate, cmdok post: melogit cesd8_dummy vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban || cid20:, or 
est sto a 
mi estimate, cmdok post: mixed cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban || cid20: 
est sto b
mi estimate, cmdok post: melogit cesd8_dummy cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban || cid20:, or
est sto c
esttab a b c ///
      using cesd_dummy_coef_imputed.rtf, ///
      ci star( + 0.1 * 0.05 ** 0.01 *** 0.001) r2 b(%4.2f) pr2 aic bic onecell replace
esttab a b c ///
      using cesd_dummy_or_imputed.rtf, ///
      eform ci star( + 0.1 * 0.05 ** 0.01 *** 0.001) r2 b(%4.2f) pr2 aic bic onecell replace
	  
log using "mediation_cesd_dummy_imputation", replace 
mi estimate (ind_eff: _b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc]) ///
   (total_eff: _b[cfdc:trust_nghb_cid]*_b[cesd8_dummy:cfdc] + _b[cesd8_dummy:trust_nghb_cid]), cmdok: gsem (cfdc <- vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban  M1[cid20]) ///
    (cesd8_dummy <- cfdc vote newqn10022 vote_cid_proportion trust_nghb_cid ln_eq_hhd_inc  ///
   $ctrl2 lag_cfdc i.cmonth urban M2[cid20], logit), cov(M1[cid20]*M2[cid20]@0)
log close    
 
